Next | Prev | Up | Top | Contents | Index

Reading the Cycle Counter

The program in Example A-2 shows how to map the high-precision cycle counter into memory and sample it. The file compiles to a library of the following functions:

mapTheTimer()Uses mmap() to map the cycle counter into the address space. Returns the unit-value of the timer in picoseconds; for example returns 21000 in a Challenge where the timer unit value is 21 nanoseconds.
timerBitCount()Returns the number of bits of precision in the timer, which varies with the CPU board type, either 32 or 64 bits.
readTimer32()Returns the least-significant (or only) word of the timer value.
readTimer64()Returns the timer value as a 64-bit unsigned integer (extended with 0-bits when necessary).
main() Compiled only when variable UNIT_TEST is set, contains code to exercise the preceding functions.

Example A-2 : Functions to Map and Read the Cycle Counter

/*****************************************************************************
||
|| The functions in this module provide access to the free-running timer
|| on the CPU board of certain SGI systems.
||
|| timerBitCount()
||
|| Returns the number of bits of data in the timer, as reported
|| by syssgi(SGI_CYCLECNTR_SIZE):
||    0 error reported by syssgi -- probably no timer in this machine
||   32 in an Indy or Crimson
||   64 in a Challenge, Onyx, and other big machines.
||
|| mapTheTimer()
||
|| This function tests the hardware environment. If the current system has
|| a timer, the function tries to map it into memory.  Errors can include:
||   * 0 returned by timerBitCount()
||   * error returned by syssgi(SGI_QUERY_CYCLECNTR)
||   * error returned by mmap(2)
|| When there is no error, the function returns a positive integer which is
|| the number of picoseconds represented by one unit increment of the timer.
|| In the event of an error, the function returns 0, and errno is set to
|| some error code.
||   mapTheTimer() can be called multiple times without harm.  To convert
|| its returned value to a fraction of a second, convert to double and
|| multiply by 1e-12.
||
|| readTimer32()
||   
|| This function calls mapTheTimer(), if it has not been called already.
|| Thus the first attempt to read the clock will map it if necessary.
|| If the timer has been mapped, its least-significant bits are returned
|| as an unsigned 32-bit integer.
||   * if mapTheTimer() failed, the returned value is always 0
||   * if the timer has 32-bit precision, the returned value is
||     the whole timer value
||   * if the timer has 64-bit precision (e.g. Challenge), the returned
||     value is the low-order word.
||
|| readTimer64()
||
|| This function is like readTimer32(), except that it returns an unsigned
|| 64-bit integer.
||   * if mapTheTimer() failed, the returned value is always 0
||   * if the timer has 32-bit precision, the returned value is
||     the whole timer value, extended with high-order 0-bits
||   * if the timer has 64-bit precision, the returned value is the whole
||     timer value.  The 64-bit timer is sampled in such as way as to 
||     compensate for rollover while minimizing bus traffic.
||
|| main()
||
|| Compiled only when UNIT_TEST is defined, provides a functional test
|| platform for the above functions.
||
|| NOTE: in two of these routines we assume that this machine is operating
|| in big-endian mode, such that the least-significant 32 bits of a 
|| long-long are at the higher word address.
||
*****************************************************************************/
#include <stddef.h>         /* for NULL */
#include <fcntl.h>          /* for O_RDONLY and open() */
#include <unistd.h>         /* for getpagesize() */
#include <sys/mman.h>       /* for constants used with mmap() */
#include <sgidefs.h>        /* for __psint_t, __uint*_t, and ABI defs */
#include <sys/syssgi.h>     /* for syssgi(), SGI_QUERY_CYCLECNTR */
#include <errno.h>          /* for errno global */
/*****************************************************************************
|| The following globals are set up by mapTheTimer() the first time called.
||   timerMapAddress == NULL means mapTheTimer() has never been called
||                   == -1 means mapTheTimer() called and failed
||                   else it points to the timer in memory
||   The data type (void *) is coerced to __uint32_t or __uint64_t in use.
||
|| The "volatile" declaration keeps the compiler from optimizing away 
|| successive references to it.
||
||   timerPicoSecs   == 0 means the timer has not been mapped successfully
||                   else is the value returned by syssgi(QUERY_CYCLECOUNTER)
||
||   timerPrecision  == value returned by syssgi(SGI_CYCLECNTR_SIZE), 
||                   but as this value is needed in the timer-reading
||                   functions, it is cached, so as to avoid a system call
||                   every time we read the clock.
||
|| If this code was redone in C++ (not a bad idea, feel free) these would
|| be class variables.
*****************************************************************************/
#define TIMER_IS_MAPPED (0 != timerPicoSecs)
#define TIMER_MAP_ATTEMPTED (NULL != timerMapAddress)
static volatile void * timerMapAddress = NULL;
static unsigned int timerPicoSecs = 0;
static unsigned int timerPrecision = 0;
unsigned int
mapTheTimer()
{
__uint32_t  timerUnits = 0; /* receives timer picosecond unit value */
__psint_t   timerPhysAddr;  /* receives timer absolute address */
__psint_t   timerPhysVPN;   /* timerPhysAddr masked to a page boundary */
__psint_t   addrMask;       /* page offset bit mask */
int         fdMem;          /* file descriptor for /dev/mmem */
    if ( ! TIMER_MAP_ATTEMPTED) /* first time through this code */
    {
        /*
        || Get the physical address of the clock in full. If there
        || is no cycle counter on this machine, syssgi returns -1.
        */
        timerPhysAddr = syssgi(SGI_QUERY_CYCLECNTR, &timerUnits);
        if ((__psint_t)-1 != timerPhysAddr) /* we have a timer */
        {
            /*
            || Trim out the offset from the address leaving the
            || page number part of the address. (VPN == virtual page number)
            */
            addrMask = getpagesize() - 1;
            timerPhysVPN = timerPhysAddr & ~addrMask;
            /*
            || Map the page containing the clock's address into the virtual
            || address space of this process.
            */
            fdMem = open("/dev/mmem", O_RDONLY);
            timerMapAddress = (void *) mmap(
                NULL,               /* addr = 0, don't care it goes */
                addrMask,           /* len = pagesize - 1 */
                PROT_READ,          /* prot = read-only */
                MAP_PRIVATE,        /* changes are unshared (n.a.) */
                fdMem,              /* map base is physical memory */
                (off_t)timerPhysVPN /* source address to map */
                );
            if ((__psint_t)-1 != (__psint_t)timerMapAddress)
            {
                /*
                || mmap() succeeded, cache info in global variables.
                */
                timerPicoSecs = timerUnits;
                timerPrecision = syssgi(SGI_CYCLECNTR_SIZE);
                /*
                || Restore any nonzero offset bits to mapped page address.
                */
                timerMapAddress = (void*) (
                    ((__psint_t)timerMapAddress) /* addr as int */
                    | (timerPhysAddr & addrMask)  /* plus offset bits */
                    );
            }
            else
                ; /* mmap() failed, timerMapAddress == -1, errno set */
        } /* end syssgi() successful */
        else
        {
            timerMapAddress = (void *)-1; /* syssgi error, no timer (?) */
        }
    } /* end attempting to initialize */
    return timerPicoSecs;
}
unsigned int
timerBitCount()
{
    if (TIMER_IS_MAPPED)
        return timerPrecision;
    if ( ! TIMER_MAP_ATTEMPTED)
    {
        mapTheTimer();
        return timerPrecision;
    }
    else return 0;
}
/*****************************************************************************
||
|| In both of the following routines, one goal is to minimize the number of
|| references to the mapped timer.  Reason: each such reference is an 
|| uncached memory reference plus a bus access, taking at least 1 usec and
|| possibly more depending on the machine.  Unnecessary references to the
|| timer should be avoided when possible.
||
|| If the timer has 64 bits, return its least-significant word. Which word
|| is that?  This code assumes the big-endian model.  An alternative
|| would be to load the long-long value and force C to convert it.  That is
|| be portable but would hit the bus twice instead of once, nullifying the
|| speed advantage that this routine has over the one following.
||
*****************************************************************************/
__uint32_t
readTimer32()
{
__uint32_t ret = 0;

    if ( ! TIMER_IS_MAPPED ) mapTheTimer();
    if ( TIMER_IS_MAPPED ) /* timer mapped ok */
    {
        if (64 == timerPrecision)
            ret = ((__uint32_t *)timerMapAddress)[1]; /* low word of 2 */
        else /* in IRIX 6.2, 32 bits is the only alternative */
            ret = *((__uint32_t *)timerMapAddress);
    }
    return ret;
}
/*****************************************************************************
||
|| When the timer has 32 bits, just fake up a long-long and return it.
|| For long timers we must ask: was this code compiled to an ABI that does
|| atomic loads of long-longs (-64 or -n32), or not (-32)?
|| In the newer ABIs, we just fetch the 64-bit timer in one move.
||
|| When compiled under a 32-bit system, the generated code loads the timer
|| value in two "lw" instructions.  The low word of the timer overflows into
|| the high word about every 90 seconds, and if that happens between the
|| lw's, the result will be wrong.  Worse, we cannot be certain which of the
|| two words the compiler will choose to load first, the low or the high.
||
|| In order to minimize the number of uncached accesses, we test for
|| overflow only when it has recently happened; that is, when
|| the most significant 9 bits of the low word are all-0.  This
|| condition defines a window of 0.17 seconds following the overflow
|| (21e-12 * 2^23 == .176160768).
|| If this were kernel code, the window could be much smaller.  In enabled
|| code we have to allow for a series of interrupts between the load of the
|| upper and lower words.  As it is, if we load the upper word just before
|| overflow, and an interrupt delays the next fetch 0.17+ seconds, we will
|| return an incorrect value.
||
*****************************************************************************/
__uint64_t
readTimer64()
{
union {
    struct { __uint32_t msw,lsw; }w; 
    __uint64_t ll;
    } ret;

    ret.ll = 0;
    if ( ! TIMER_IS_MAPPED ) mapTheTimer();
    if ( TIMER_IS_MAPPED ) /* it mapped ok */
    {
        if (timerPrecision == 32)
        {
            ret.w.msw = 0;
            ret.w.lsw = *((__uint32_t *)timerMapAddress);
        }
        else
        {
#if (_MIPS_SIM == _MIPS_SIM_NABI32 || _MIPS_SIM == _MIPS_SIM_ABI64)
            /* 64-bit loads are atomic */
            ret.ll = *(__uint64_t *)timerMapAddress;
#else /* 64-bit loads are not atomic */
            ret.w.msw = ((__uint32_t *)timerMapAddress)[0];
            ret.w.lsw = ((__uint32_t *)timerMapAddress)[1];
            if ( (ret.w.lsw & 0xff800000) == 0)
            {
            /*
            || The high word incremented not more than .17 sec ago.
            || Provided there is not a delay here exceeding 89.8 sec,
            || the following single load ensures we have the high word
            || value that is correctly associated with the low word
            || we already picked up.
            */
                ret.w.msw = ((__uint32_t *)timerMapAddress)[0];
            }
#endif
        }
    }
    return ret.ll;
}

#ifdef UNIT_TEST
#include <stdio.h>

int main(int argc, char*argv[])
{
    int     j;
    int     numTix = 10;
    unsigned int picosecs;
    unsigned short tbits;
    double  dmicsecs;
        
    if (argc>1) numTix = atoi(argv[1]);

    if ( picosecs = mapTheTimer() )
    {
        tbits = timerBitCount();
        dmicsecs = ((double)picosecs)/1e6;
        printf("The timer has %d bits of precision\n",tbits);
        printf("One timer unit == %d picoseconds or %g us\n",
                                    picosecs, dmicsecs);
    }
    else
    {
        perror("mapTheTimer");
        return errno;
    }

    {
        __uint32_t st1, st2, stx;
        st1 = readTimer32();
        printf("\nreading timer as 32 bits\n\n");
        for(j=0; j<numTix; ++j)
        {
            st2 = readTimer32();
            stx = st2 - st1;
            printf("0x%0x - 0x%0x = 0x%0x (%g usecs)\n",
                    st2,     st1,    stx, (stx*dmicsecs) );
            st1 = st2;
        }
    }

    {
        __uint64_t lt1, lt2, ltx;
        lt1 = readTimer64();
        printf("\nreading timer as 64 bits\n\n");
        for(j=0; j<numTix; ++j)
        {
            lt2 = readTimer64();
            ltx = lt2 - lt1;
            printf("0x%0llx - 0x%0llx = 0x%0llx (%g usecs)\n",
                    lt2,        lt1,      ltx, (ltx * dmicsecs));
            lt1 = lt2;
        }
    }
}
#endif

Next | Prev | Up | Top | Contents | Index